From b18f38d8129360973c360db8a53b48c56dc73408 Mon Sep 17 00:00:00 2001 From: =?utf8?q?=C3=98yvind=20Kol=C3=A5s?= Date: Thu, 7 Sep 2017 00:29:59 +0200 Subject: [PATCH] extensions: make float-half extension use exact LUTs Both for conversion from half to float with a on load initialized table, and with new tables an faster approach from qcms / mozilla / webkit. --- extensions/float-half.c | 161 +++++++++++++++++++++++++++------------- 1 file changed, 109 insertions(+), 52 deletions(-) diff --git a/extensions/float-half.c b/extensions/float-half.c index 08b7dfb..b471a02 100644 --- a/extensions/float-half.c +++ b/extensions/float-half.c @@ -75,7 +75,7 @@ #include "babl.h" #include "extensions/util.h" -static void halfp2singles(void *target, const void *source, long numel) +static void halfp2singles_fun(void *target, const void *source, long numel) { uint16_t *hp = (uint16_t *) source; // Type pun input as an unsigned 16-bit int uint32_t *xp = (uint32_t *) target; // Type pun output as an unsigned 32-bit int @@ -122,59 +122,107 @@ static void halfp2singles(void *target, const void *source, long numel) } } +static float half_float_table[65536]; + +static void halfp2singles(void *target, const void *source, long numel) +{ + uint16_t *src = (uint16_t *) source; + float *dst = (float *) target; + int i; + for (i = 0; i < numel; i++) + { + dst[i] = half_float_table[src[i]]; + } +} + +/* from table based approach from qcms/blink/webkit */ + +const unsigned short half_float_base_table[512] = { +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,1,2,4,8,16,32,64,128,256, +512,1024,2048,3072,4096,5120,6144,7168,8192,9216,10240,11264,12288,13312,14336,15360, +16384,17408,18432,19456,20480,21504,22528,23552,24576,25600,26624,27648,28672,29696,30720,31744, +31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744, +31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744, +31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744, +31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744, +31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744, +31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744, +31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744, +32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768, +32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768, +32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768, +32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768, +32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768, +32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768, +32768,32768,32768,32768,32768,32768,32768,32769,32770,32772,32776,32784,32800,32832,32896,33024, +33280,33792,34816,35840,36864,37888,38912,39936,40960,41984,43008,44032,45056,46080,47104,48128, +49152,50176,51200,52224,53248,54272,55296,56320,57344,58368,59392,60416,61440,62464,63488,64512, +64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512, +64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512, +64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512, +64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512, +64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512, +64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512, +64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512 +}; + +const unsigned char half_float_shift_table[512] = { +24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24, +24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24, +24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24, +24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24, +24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24, +24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24, +24,24,24,24,24,24,24,23,22,21,20,19,18,17,16,15, +14,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13, +13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,24, +24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24, +24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24, +24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24, +24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24, +24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24, +24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24, +24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,13, +24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24, +24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24, +24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24, +24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24, +24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24, +24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24, +24,24,24,24,24,24,24,23,22,21,20,19,18,17,16,15, +14,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13, +13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,24, +24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24, +24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24, +24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24, +24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24, +24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24, +24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24, +24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,13 +}; + +static inline unsigned short float_to_half_float(float f) +{ + // See Blink::Source/platform/graphics/gpu/WebGLImageConversion.cpp::convertFloatToHalfFloat() and http://crbug.com/491784 + union { + float f; + uint32_t u; + } u = {f}; + unsigned temp = u.u; + unsigned signexp = (temp >> 23) & 0x1ff; + return half_float_base_table[signexp] + ((temp & 0x007fffff) >> half_float_shift_table[signexp]); +} + static void singles2halfp(void *target, const void *source, long numel) { - uint16_t *hp = (uint16_t *) target; // Type pun output as an unsigned 16-bit int - uint32_t *xp = (uint32_t *) source; // Type pun input as an unsigned 32-bit int - uint16_t hs, he, hm; - uint32_t x, xs, xe, xm; - int hes; - - if( source == NULL || target == NULL ) { // Nothing to convert (e.g., imag part of pure real) - return; - } - while( numel-- ) { - x = *xp++; - if( (x & 0x7FFFFFFFu) == 0 ) { // Signed zero - *hp++ = (uint16_t) (x >> 16); // Return the signed zero - } else { // Not zero - xs = x & 0x80000000u; // Pick off sign bit - xe = x & 0x7F800000u; // Pick off exponent bits - xm = x & 0x007FFFFFu; // Pick off mantissa bits - if( xe == 0 ) { // Denormal will underflow, return a signed zero - *hp++ = (uint16_t) (xs >> 16); - } else if( xe == 0x7F800000u ) { // Inf or NaN (all the exponent bits are set) - if( xm == 0 ) { // If mantissa is zero ... - *hp++ = (uint16_t) ((xs >> 16) | 0x7C00u); // Signed Inf - } else { - *hp++ = (uint16_t) 0xFE00u; // NaN, only 1st mantissa bit set - } - } else { // Normalized number - hs = (uint16_t) (xs >> 16); // Sign bit - hes = ((int)(xe >> 23)) - 127 + 15; // Exponent unbias the single, then bias the halfp - if( hes >= 0x1F ) { // Overflow - *hp++ = (uint16_t) ((xs >> 16) | 0x7C00u); // Signed Inf - } else if( hes <= 0 ) { // Underflow - if( (14 - hes) > 24 ) { // Mantissa shifted all the way off & no rounding possibility - hm = (uint16_t) 0u; // Set mantissa to zero - } else { - xm |= 0x00800000u; // Add the hidden leading bit - hm = (uint16_t) (xm >> (14 - hes)); // Mantissa - if( (xm >> (13 - hes)) & 0x00000001u ) // Check for rounding - hm += (uint16_t) 1u; // Round, might overflow into exp bit, but this is OK - } - *hp++ = (hs | hm); // Combine sign bit and mantissa bits, biased exponent is zero - } else { - he = (uint16_t) (hes << 10); // Exponent - hm = (uint16_t) (xm >> 13); // Mantissa - if( xm & 0x00001000u ) // Check for rounding - *hp++ = (hs | he | hm) + (uint16_t) 1u; // Round, might overflow to inf, this is OK - else - *hp++ = (hs | he | hm); // No rounding - } - } - } - } + const float *src = source; + uint8_t *dst = target; + int i; + for (i = 0; i < numel; i++) + dst[i] = float_to_half_float (src[i]); } static inline long @@ -232,6 +280,7 @@ int init (void); int init (void) { + int i; const Babl *rgbaF_linear = babl_format_new ( babl_model ("RGBA"), babl_type ("float"), @@ -337,6 +386,14 @@ init (void) babl_component ("Y'"), NULL); + for (i = 0; i < 65536; i++) + { + uint16_t buf[2] = {i, i}; + float fbuf[2]; + halfp2singles_fun(fbuf, buf, 1); + half_float_table[i] = fbuf[0]; + } + #define CONV(src, dst) \ { \ babl_conversion_new (src ## _linear, dst ## _linear, "linear", conv_ ## src ## _ ## dst, NULL); \ -- 2.30.2